%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import os
import shutil
import matplotlib.pyplot as plt
import numpy as np
import time
import pandas as pd
import json
import matplotlib.image as mpimg
import seaborn as sb
import torch
from torch import nn
from torch import optim
from torchvision import datasets, transforms, models
from collections import OrderedDict
import h5py
from PIL import Image
train_dir = 'E:\\AI\\MLND_CN_P7_DogvsCat\\train2'
test_dir = 'E:\\AI\\MLND_CN_P7_DogvsCat\\test2'
#导入特征
X_train = []
X_test = []
for filename in ["gap_Res.h5", "gap_Den.h5", "gap_VGG.h5"]:
with h5py.File(filename, 'r') as h:
X_train.append(np.array(h['train']))
X_test.append(np.array(h['test']))
y_train = np.array(h['train_label'])
X_train = np.concatenate(X_train, axis=1)
X_test = np.concatenate(X_test, axis=1)
inputs_features = X_train.shape[1]
X_train = torch.tensor(X_train).float()
y_train = torch.tensor(y_train).float()
Tr_sets = torch.utils.data.TensorDataset(X_train, y_train)
Tr_loader = torch.utils.data.DataLoader(Tr_sets, batch_size=128, shuffle=True)
X_test = torch.from_numpy(X_test).float()
Te_sets = torch.utils.data.TensorDataset(X_test)
Te_loader = torch.utils.data.DataLoader(Te_sets, batch_size=128)
#定义简单分类器模型
classifier = nn.Sequential(OrderedDict([('drop', nn.Dropout(p=0.5)),
('fc', nn.Linear(inputs_features, 1)),
('sigmoid', nn.Sigmoid())]))
cost = nn.BCELoss()
optimizer = optim.Adadelta(classifier.parameters(),lr=0.1)
# scheduler=torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.99)
classifier.float().cuda()
for p in classifier.parameters():
print(p.dtype)
#定义训练函数并训练
accuracy = {}
loss_dict = {}
val_dict = {}
def train(model, loader):
since = time.time()
steps = 0
print_every = 40
model.train()
running_loss = 0
for inputs, labels in loader:
model.train()
steps += 1
optimizer.zero_grad()
inputs, labels = inputs.cuda(), labels.cuda()
outputs = model.forward(inputs)
loss = cost(outputs, labels[:,None])
loss.backward()
optimizer.step()
# scheduler.step()
running_loss += loss.item()
if steps % print_every == 0:
with torch.no_grad():
model.eval()
test_loss = 0
output = model.forward(inputs)
test_loss += cost(output, labels[:,None]).item()
output_01 = np.array([])
for i in output:
if i >= 0.5:
output_01 = np.append(output_01, 1.)
elif i < 0.5:
output_01 = np.append(output_01, 0.)
output_01 = torch.tensor(output_01)
output_01 = output_01.float().cuda()
equality = (labels.data == output_01)
accuracy[str(e + 1) + '.' + str(steps)] = equality.type(torch.FloatTensor).mean().numpy()
loss_dict[str(e + 1) + '.' + str(steps)] = running_loss/print_every
val_dict[str(e + 1) + '.' + str(steps)] = test_loss
print("Epoch: {}/{}.. ".format(e+1, epoch),
"Training Loss: {:.3f}.. ".format(loss_dict[str(e + 1) + '.' + str(steps)]),
"Valid Loss: {:.3f}.. ".format(val_dict[str(e + 1) + '.' + str(steps)]),
"Train Accuracy: {:.3f}".format(accuracy[str(e + 1) + '.' + str(steps)]))
running_loss = 0
model.train()
since = time.time() - since
print('Use', since)
epoch = 8
for e in range(0, epoch):
train(classifier, Tr_loader)
# torch.save(classifier, 'mixed{}.pth'.format(time.time()))
#获取测试集的输出并保存csv
def get_outputs(model, loader):
path_list = []
predicted_list_1 = []
predicted_list_2 = []
model.cuda()
for data in loader:
model.eval()
with torch.no_grad():
data = data[0].cuda()
score = model(data)
predicted = torch.squeeze(score)
predicted = predicted.data.cpu().numpy().tolist()
predicted_list_1 += predicted
predicted_list_2 = np.array(predicted_list_1).clip(min=0.005, max=0.995).tolist()
classes = datasets.ImageFolder(test_dir).classes
path_list = np.array(classes).astype(int)
outputs = pd.DataFrame({'id':path_list, 'label':predicted_list_2}).sort_values('id')
now = time.time()
outputs.to_csv('submission_{}.csv'.format(now), index=False)#保存时不用index
csv_read = pd.read_csv('submission_{}.csv'.format(now), index_col='id')#读取时用一下index,方面读取
display(csv_read)
return csv_read
csv_read = get_outputs(classifier, Te_loader)
#随机生成一些图片并显示,请调整seed数值看其他图片。
def random_t(seed):
np.random.seed(seed)
r_list = np.random.randint(1, 12501, 20)
for a in r_list:
if csv_read.loc[a][0] > 0.5:
lena = mpimg.imread('test\\{}.jpg'.format(a))
plt.figure()
plt.title('{} is a dog.Output is {:.4f}.'.format(a, csv_read.loc[a][0]))
plt.imshow(lena)
if csv_read.loc[a][0] < 0.5:
lena = mpimg.imread('test\\{}.jpg'.format(a))
plt.figure()
plt.title('{} is a cat.Output is {:.4f}.'.format(a, csv_read.loc[a][0]))
plt.imshow(lena)
random_t(12)
df = pd.DataFrame({'accuracy':accuracy, 'loss_dict':loss_dict,'val_dict':val_dict})
df
plt.figure(figsize=(15, 15))
x=np.arange(32) + 1
plt.plot(x,df['loss_dict'],label="Train Loss")
plt.plot(x,df['val_dict'],label="Valid Loss")
plt.plot(x,df['accuracy'], label='Accuracy')
plt.legend()
plt.show()